import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.data import Field,TabularDataset,BucketIterator
import spacy
import numpy as np

#定义字段处理
TEXT= Field(
    tokenize='spacy',
    tokenizer_language='en_core_web_sm',
    include_lengths=True
)

#加载数据集
train_data,test_data=TabularDataset.splits(
    path='./data',
    train='train.csv',
    test='test.csv',
    format='csv',
    field=[('text,TEXT'),('label','LABEL')]
)

#构建词汇表
TEXT.build_vocab(
    train_data,
    max_size=25000,
    vectors="glove.6B.100d"
    )

"""
模型构建
lstm模型架构
"""
#模型实现代码

class SentimentLSTM(nn.Module):
    def __inint__(self,vocab_size,embedding_dim,hidden_dim,output_dim,n_layers):
        super().__init__
        self.embedding=nn.Embedding(vocab_size,embedding_dim)
        self.lstm=nn.LSTM(
            embedding_dim,
            hidden_dim,
            num_layers=n_layers,
            bidirectional=True
            )
        self.fc=nn.Linear(hidden_dim*2,output_dim)
        self.dropout=nn.Dropout(0.5)
    def forward(self,text,text_lengths):
        embedded=self.dropout(self.embedding(text))
        packed_embedded=nn.utils.rnn.pack_padded_sequence(
            enbedded,
            text_lengths.to('cpu')
        )
        packed_output,(hidden,cell)=self.lstm(packed_embedded)
        hidden=self.dropout(torch.cat(hidden[-2,:,:],hidden[-1,:,:],dim=1))
        return self.fc(hidden)
"""
模型训练
"""
INPUT_DIM =len(TEXT.vocab)
EMBEDDING_DIM=100
HIDDEN_DIM=256
OUTPUT_DIM=1
N_LAYERS=2
#初始化模型
model=SentimentLSTM(INPUT_DIM,EMBEDDING_DIM,HIDDEN_DIM,OUTPUT_DIM,N_LAYERS)

#优化器和损失函数
optimizer=optim.Adam(model.parameters)
criterion=nn.BCEWithLogitsLoss()

#训练循环
def train(model,iterator,optimizer,criterion):
    epoch_loss=0
    epoch_acc=0
    model.train()
    for batch in iterator:
        text,text_lengths=batch.text
        predictions=model(text,text_lengths).squeeze(1)
        loss=criterion(predictions,batch.label)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        epoch_loss+=loss.item()
        epoch_acc+=accuracy(predictions,batch.label)
    return epoch_loss /len(iterator),epoch_acc/len(iterator)

def evaluate(model,iterator,criterion):
    epoch_loss=0
    epoch_acc=0

    model.eval()

    with torch.no_grad():
        for batch in iterator:
            text,text_lengths=batch.text
            predictions=model(text,text_lengths).squeeze(1)
            loss=criterion(predictions,batch.label)
            epoch_loss+=loss.item()
            epoch_acc+=accuracy(predicitons,batch.label)

    return epoch_loss /len(iterator),epoch_acc /len(iterator)

#准确率计算
def accuracy(preds,y):
    rounded_preds=torch.round(torch.sigmoid(preds))
    correct =(rounded_preds==y).float()
    acc=correct.sum() /len(correct)
    return acc

"""
模型应用
"""
#预测新文本
def predict_sentiment(model,sentence):
    tokenized=[tok.text for tok in nlp.tokenizer(sentence)]
    indexed=[TEXT.vocab.stoi[t] for t in tokenized]
    length=[len(indexed)]
    tensor=torch.LongTensor(indexed).to('cpu')
    tensor=tensor.unsqueeze(1)
    length_tensor=torch.LongTensor(length)
    prediction=torch.sigmoid(model(tensor,length_tensor))
    return prediction.item()

#示例预测
positive_review="This movie was fantastic! I really enjoyed it."
negative_review='The film was terrible and boring'

print(f'Positive review score:{predict_sentiment(model,positive_review):.4f}')
print(f"Negative review score:{predict_sentiment(model,positive_review):.4f}")




